import pandas as pd
import numpy as np
import copy

pd.options.display.max_columns = 125
pd.options.display.max_rows = 1500

# Load data
filepath = '../original/Windham, NH, Forensic Audit May, 2021 Hand Tally Data.xlsx'

#  Original Votes
e1 = pd.read_excel(filepath, sheet_name = 0, header=1)
e1.drop("Unnamed: 24", axis=1, inplace=True)

# Rename columns
e1.rename(columns={"Tally Batch ID": "county"}, inplace = True)

e1.rename(columns={"writein": "write_gov"}, inplace = True)
e1.rename(columns={"under": "under_gov"}, inplace = True)
e1.rename(columns={"over": "over_gov"}, inplace = True)

e1.rename(columns={"writein.1": "write_sen"}, inplace = True)
e1.rename(columns={"under.1": "under_sen"}, inplace = True)
e1.rename(columns={"over.1": "over_sen"}, inplace = True)

e1.rename(columns={"writein.2": "write_rep"}, inplace = True)
e1.rename(columns={"under.2": "under_rep"}, inplace = True)
e1.rename(columns={"over.2": "over_rep"}, inplace = True)

e1 = e1.melt(["county"],["Perry", "Feltes", "Sununu", "write_gov","write_rep",
                         "write_sen", "under_gov","under_rep","under_sen", 
                         "over_gov","over_rep","over_sen", "O'Donnell", "Shaheen", 
                         "Messner", "St. Laurent", "Azibert", "Roman", "Singureanu", 
                         "Soti", "Griffin", "Lynn", "McMahon"], var_name = "candidate")

e1[["original_votes"]] = pd.DataFrame(e1["value"].tolist(), index=e1.index)
e1.drop("value", axis=1, inplace = True)

#  Audited Votes
e2 = pd.read_excel(filepath, sheet_name = 0, header=1)
e2.drop("Unnamed: 24", axis=1, inplace=True)

# Rename columns
e2.rename(columns={"Tally Batch ID": "county1"}, inplace = True)

e2.rename(columns={"writein": "write_gov_audit"}, inplace = True)
e2.rename(columns={"under": "under_gov_audit"}, inplace = True)
e2.rename(columns={"over": "over_gov_audit"}, inplace = True)

e2.rename(columns={"writein.1": "write_sen_audit"}, inplace = True)
e2.rename(columns={"under.1": "under_sen_audit"}, inplace = True)
e2.rename(columns={"over.1": "over_sen_audit"}, inplace = True)

e2.rename(columns={"writein.2": "write_rep_audit"}, inplace = True)
e2.rename(columns={"under.2": "under_rep_audit"}, inplace = True)
e2.rename(columns={"over.2": "over_rep_audit"}, inplace = True)

e2 = e2.melt(["county1"],["Perry", "Feltes", "Sununu", "write_gov_audit",
                         "write_rep_audit","write_sen_audit","under_gov_audit",
                         "under_rep_audit","under_sen_audit", "over_gov_audit",
                         "over_rep_audit","over_sen_audit","O'Donnell", "Shaheen", 
                         "Messner", "St. Laurent", "Azibert", "Roman", "Singureanu", 
                         "Soti", "Griffin", "Lynn", "McMahon"], var_name = "candidate1")

e2[["audited_votes"]] = pd.DataFrame(e2["value"].tolist(), index=e2.index)
e2.drop("value", axis=1, inplace = True)

df = pd.concat([e1, e2], axis = 1)

# Add necessary columns
df["state"] = "NEW HAMPSHIRE"
df["date"] = "2020-11-03"
df["office"] = ''

#  Diff and making floats to ints
df['diff'] = df['audited_votes'] - df['original_votes']

df['original_votes'] = df['original_votes'].fillna(0)
df['audited_votes'] = df['audited_votes'].fillna(0)
df['diff'] = df['diff'].fillna(0)

df = df.astype({'original_votes': 'int64'})
df = df.astype({'audited_votes': 'int64'})
df = df.astype({'diff': 'int64'})

# Renaming candidates and office
df['office'] = np.where((df['candidate'] == 'Perry'), "Governor", df['office'])
df['office'] = np.where((df['candidate'] == 'Feltes'), "Governor", df['office'])
df['office'] = np.where((df['candidate'] == 'Sununu'), "Governor", df['office'])

df['office'] = np.where((df['candidate'] == "O'Donnell"), "US Senate", df['office'])
df['office'] = np.where((df['candidate'] == 'Shaheen'), "US Senate", df['office'])
df['office'] = np.where((df['candidate'] == 'Messner'), "US Senate", df['office'])

df['office'] = np.where((df['candidate'] == 'St. Laurent'), "State Representative", df['office'])
df['office'] = np.where((df['candidate'] == 'Azibert'), "State Representative", df['office'])
df['office'] = np.where((df['candidate'] == 'Roman'), "State Representative", df['office'])
df['office'] = np.where((df['candidate'] == 'Singureanu'), "State Representative", df['office'])
df['office'] = np.where((df['candidate'] == 'Soti'), "State Representative", df['office'])
df['office'] = np.where((df['candidate'] == 'Griffin'), "State Representative", df['office'])
df['office'] = np.where((df['candidate'] == 'Lynn'), "State Representative", df['office'])
df['office'] = np.where((df['candidate'] == 'McMahon'), "State Representative", df['office'])

df['office'] = np.where((df['candidate'] == 'write_gov'), "Governor", df['office'])
df['office'] = np.where((df['candidate'] == 'write_sen'), "US Senate", df['office'])
df['office'] = np.where((df['candidate'] == 'write_rep'), "State Representative", df['office'])

df['office'] = np.where((df['candidate'] == 'under_gov'), "Governor", df['office'])
df['office'] = np.where((df['candidate'] == 'under_sen'), "US Senate", df['office'])
df['office'] = np.where((df['candidate'] == 'under_rep'), "State Representative", df['office'])

df['office'] = np.where((df['candidate'] == 'over_gov'), "Governor", df['office'])
df['office'] = np.where((df['candidate'] == 'over_sen'), "US Senate", df['office'])
df['office'] = np.where((df['candidate'] == 'over_rep'), "State Representative", df['office'])

df['candidate'] = np.where((df['candidate'] == 'write_gov'), "[Write-In]", df['candidate'])
df['candidate'] = np.where((df['candidate'] == 'write_sen'), "[Write-In]", df['candidate'])
df['candidate'] = np.where((df['candidate'] == 'write_rep'), "[Write-In]", df['candidate'])

df['candidate'] = np.where((df['candidate'] == 'under_gov'), "Undervotes", df['candidate'])
df['candidate'] = np.where((df['candidate'] == 'under_sen'), "Undervotes", df['candidate'])
df['candidate'] = np.where((df['candidate'] == 'under_rep'), "Undervotes", df['candidate'])

df['candidate'] = np.where((df['candidate'] == 'over_gov'), "Overvotes", df['candidate'])
df['candidate'] = np.where((df['candidate'] == 'over_sen'), "Overvotes", df['candidate'])
df['candidate'] = np.where((df['candidate'] == 'over_rep'), "Overvotes", df['candidate'])

# Uppercase
df["candidate"] = df["candidate"].str.upper()
df["office"] = df["office"].str.upper()

# Order 
df = df[['county', 'office', 'candidate','original_votes', 'audited_votes', "diff", "state", "date"]]

#County variable is holding tabular numbers
df["subset"] = df["county"]
df.county = "WINDHAM"

# Save the work
df.to_csv('../ready/NH-cleaned.csv', index=False) 
